intermediate_data directoryExternal-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecksclean_names()collected column from a character into a
POSIXct date formatstripchart_stage
column#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L0_subdaily/ManualChecks"
#read in the manual checks data
mc <- read_csv(here(filepath, "2017-2021_S2Stage.csv"))
#clean the data
mc_clean <- mc %>%
clean_names() %>%
#remove S2 lagging pool data and keep only the S2 weir data
subset(name == "S2 WEIR") %>%
mutate(collected = as.POSIXct(collected, format = '%m/%d/%Y %H:%M',
tz = "GMT"),
year = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
format = '%Y'),
date = format(as.POSIXct(collected, format = '%Y-%m-%d %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d')
) %>%
subset(year >= 2017 & year <= 2019) %>%
subset(!is.na(stripchart_stage))
#save clean data CSV to use in future RMD files
write.csv(x = mc_clean,
file = file.path(here("intermediate_data", "mc_clean.csv")),
row.names = FALSE)
#plot
ggplot(data = mc_clean) +
geom_point(aes(x = collected, y = stripchart_stage)) +
theme_classic() +
labs(y = "Stream Height (ft)",
x = "Time",
title = "S2 Weir Manual Streamflow Checks (2017 - 2019)") +
theme(plot.title = element_text(hjust = 0.5))
Combine all 3 water years using rbind()
Plot stripchart data (as lines) then add the manual checks (as points) on top
#combine 2017 - 2019 data
all_streamflow <- rbind(wy2017_clean, wy2018_altered, wy2019_clean)
#save clean data CSV to use in future RMD files
write.csv(x = all_streamflow,
file = file.path(here("intermediate_data", "all_streamflow.csv")),
row.names = FALSE)
#plot
p_all <- ggplot(data = all_streamflow) +
geom_line(aes(x = datetime, y = stream_height_ft),
size = 0.25) +
geom_point(data = mc_clean,
aes(x = collected,
y = stripchart_stage),
color = "red",
size = 0.5) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "WY 2017 - 2019 S2 Bog Stream Height",
subtitle = "Stripchart data are plotted as black lines. Manual checks are plotted as red dots.") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#static plot
#p_all
#save PNG file
ggsave(filename = "streamflow_with_manual_checks.png",
plot = p_all,
path = "figures/",
width = 6,
height = 3,
units = c("in"),
dpi = 300)
#interactive plot
ggplotly(p_all)
Next, we are interested in a 1:1 comparison of stripchart data vs
manual checkpoints data at the exact same timestamp. Since the
stripchart data and manual checkpoints do not align perfectly, we will
interpolate the stripchart stream flow values using the zoo
package to estimate stripchart values at the time of the manual
checks.
#clean the manual checks data
mc_sub <- mc_clean %>%
#extract the data (without the timestamp)
mutate(date = format(as.POSIXct(collected, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y-%m-%d'),
date = as.POSIXct(date, tz = "GMT"),
year = as.numeric(year)
) %>%
#rename column
rename(datetime = collected) %>%
#remove unnecessary columns
select(-site, -lab_id, -name, -point_gage, -logger_stage)
#identify date ranges of interest: 2017-04-04 to 2019-12-31
##aka the range of the manual checkpoint data
max_date <- max(mc_sub$date)
min_date <- min(mc_sub$date)
nrows <- nrow(mc_sub)
#subset stripchart data to fit within the time range of the manual checkpoints
streamflow_sub <- all_streamflow %>%
#interpolate stripchart values to fill in NA values
mutate(
approx = na.approx(stream_height_ft,
method = "linear"),
year = format(as.POSIXct(datetime, format = '%m/%d/%Y %H:%M:%S',
tz = "GMT"),
format = '%Y'),
year = as.numeric(year)
) %>%
subset(date <= max_date & date >= min_date)
#plot the stripchart values vs interpolated
p_interpolate <- ggplot() +
#plot recorded streamflow
geom_line(data = streamflow_sub,
aes(x = datetime, y = stream_height_ft),
color = "red") +
#plot interpolated streamflow
geom_line(data = streamflow_sub,
aes(x = datetime, y = approx),
color = "blue",
alpha = 0.3) +
#plot manual checks
geom_point(data = mc_sub,
aes(x = datetime, y = stripchart_stage),
size = 0.75) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "S2 Streamflow, Interpolated Streamflow, and Manual Checks",
subtitle = "Streamflow is red. \n Interpolated streaflow is blue. \n Manual checks are black points. ") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#static plot
p_interpolate
#interactive plot
ggplotly(p_interpolate)
#join the stripchart data and manual checks data
fj <- full_join(x = mc_sub,
y = streamflow_sub,
by = c("datetime", "date", "year")) %>%
#rename columns
rename(manual_check = stripchart_stage,
stripchart = stream_height_ft) %>%
#rearrange dataframe into a long format
pivot_longer(cols = c("manual_check", "approx"),
names_to = "types",
values_to = "stream_height_ft") %>%
#interpolate to fill in missing NA values
mutate(approx2 = na.approx(stream_height_ft,
method = "linear")) %>%
#remove extraneous columns
select(-stream_height_ft) %>%
#return to wide format to create scatterplot
pivot_wider(
names_from = "types",
values_from = "approx2"
) %>%
#unlist the columns created by pivot_wider()
unnest
#plot all data
ggplot(data = fj) +
geom_point(aes(x = manual_check,
y = approx))
#left join the joined data and manual checks data to keep only the timestamps of interest
lj <- left_join(x = mc_sub,
y = fj,
by = c("datetime", "year", "date")) %>%
#ensure the correct column types
mutate(manual_check = as.numeric(manual_check),
stripchart = as.numeric(stripchart))
#test if the subsetted data has the same number of observations as the manual checks dataframe
if(nrow(lj) != (nrow(mc_sub))) stop("Check lj dataframe dimensions")
#save clean data CSV to use in future RMD files
write.csv(x = lj,
file = file.path(here("intermediate_data", "streamflow_mc_lj.csv")),
row.names = FALSE)
p_1to1 <- ggplot() +
geom_point(data = lj,
aes(x = manual_check, y = approx),
alpha = 0.75) +
theme_classic() +
labs(x = "Manual Checkpoints",
y = "Stripchart Data",
title = "S2 Manual Checks vs Interpolated Stripchart Values (2017 - 2019)",
subtitle = "The line y = x is plotted for reference.") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)) +
geom_abline(slope = 1, intercept = 0) +
xlim(0, 0.4) +
ylim(0, 0.4)
#static plot
p_1to1
#save the figure
ggsave(filename = "streamflow_mc_comparison.png",
plot = p_1to1,
path = "figures/",
width = 6,
height = 5,
units = c("in"),
dpi = 300)
#interactive plot
ggplotly(p_1to1)
streamflow_mc_diff <- streamflow_mc_lj %>%
#calculate the difference between the stripchart - interpolated value
mutate(diff = stripchart - manual_check)
#save clean data CSV to use in future RMD files
write.csv(x = streamflow_mc_diff,
file = file.path(here("intermediate_data", "streamflow_mc_diff.csv")),
row.names = FALSE)
#plot the difference in values (stripchart - manual checks)
p_diff <- ggplot(data = streamflow_mc_diff) +
geom_point(aes(x = datetime, y = diff),
size = 0.5) +
geom_line(aes(x = datetime, y = diff)) +
theme_classic() +
labs(x = "Time",
y = "Difference in Stream Height (ft)",
title = "Difference Between Stripcharts and Manual Checks",
subtitle = "Differences were calculated as stripchart value - manual checks value.") +
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
#static plot
p_diff
#save the figure
ggsave(filename = "streamflow_mc_diff.png",
plot = p_diff,
path = "figures/",
width = 6,
height = 3,
units = c("in"),
dpi = 300)
Read in the manual checks data from the External-MEF_DATA Box folder
External-MEF_DATA/Hydro/Streamflow/L1_subdailyClean and manipulate the data into a tidy format
clean_names()Plot the old shaft encoder data
#create file path to call the data from Box
## Mia's file path
filepath <- "/Users/miaforsline/Library/CloudStorage/Box-Box/External-MEF_DATA/Hydro/Streamflow/L1_subdaily/"
#read in the 2019 data
se_data <- read_csv(here(filepath, "S2_5min_2017-2019.csv"))
#clean the data
se_clean <- se_data %>%
clean_names() %>%
select(-x1, -record, -cond, -ct, -streamflow_cfs, -streamflow_lps, -temp_c) %>%
rename(datetime = timestamp)
#save clean data CSV to use in future RMD files
write.csv(x = se_clean,
file = file.path(here("intermediate_data", "se_clean.csv")),
row.names = FALSE)
#plot
ggplot() +
geom_line(data = se_clean, aes(x = datetime, y = stage)) +
theme_classic() +
labs(x = "Time",
y = "Stream Height (ft)",
title = "Shaft Encoder S2 Bog (2017 - 2019)") +
theme(plot.title = element_text(hjust = 0.5))